rm(list = ls())
pacman::p_load(data.table, tidyverse, haven, foreign, sf, fixest, car, systemfit, 
               modelsummary, kableExtra, viridis, wesanderson, egg)

dir = dirname(rstudioapi::getSourceEditorContext()$path)
setwd(dir)

sf::sf_use_s2(FALSE)
options(modelsummary_factory_default = 'kableExtra')

gen.col.index <- function(dim){
  x <- 1:dim
  out <- c("\u00A0", paste(" (",x,") ",sep=""))
  return(out)
}

fix.format <- function(df){
  df <- gsub("&lt;0.01","0.01",df)
  df <- gsub(" NA ","", df)
  df <- gsub("−","-", df)
  return(df)
}

label <- fread("Data/question_mapping.csv")

################################################################################
### Table 3: Election outcomes for J-assigned candidates #######################

d <- read_dta("Data/descriptive.dta") 

gen.desc <- function(yr){
  x <- d %>% filter(year==yr)
  
  n <- dim(x)[1]
  x <- x %>% filter(samp==1)

  out_1 <- feols(winner_J-0.5 ~ 1, data=x, notes=F)
  out_2 <- feols(rank_top_J-0.5 ~ 1, data=x, notes=F)
  
  n_samp <- out_1$nobs_origin
  mu_1 <- str_pad(round(mean(na.omit(x$winner_J)),2),4,"right","0")
  mu_2 <- str_pad(round(mean(na.omit(x$rank_top_J)),2),4,"right","0")
  
  pval <- modelsummary(list(out_1,out_2), output="data.frame", estimate="[{p.value}]{stars}", statistic=NULL, fmt=2, stars=c('***' = .01, '**' = .05, '*' = .1)) %>% select(4:5) %>% filter(row_number()==1)
  mu_1 <- paste(mu_1,pval[1])
  mu_2 <- paste(mu_2,pval[2])
  
  yr <- as.character(yr)
  n <- as.character(n)
  
  tab <- data.frame(yr, n, mu_1, mu_2) 
  tab <- tab %>% mutate(across(everything(), ~ str_replace_all(., "<0.01", "0.00")))
  return(tab)
}

out <- bind_rows(lapply(unique(d$year), gen.desc))
colnames(out) <- paste("(",1:dim(out)[2],")",sep="")
head <- c("\\\\specialcell{Year}","\\\\specialcell{Constituencies}","\\\\specialcell{P(Winner\\\\;|\\\\;$J$)}","\\\\specialcell{P($J$\\\\;|\\\\;Rank=1)}") #"p-value", ,"p-value"

tab <- datasummary_df(out, col.names=gen.col.index(4)[2:5], fmt=2, format="latex", align="lrll",escape=F,
                      title="Election outcomes for $J$-assigned candidates\\label{tab:3}",position="!htpb") %>%
  add_header_above(head, escape=F) %>% add_footnote("\\StarnoteAllElections", threeparttable = T, escape=F, notation="none") %>% 
  kable_styling(position = "center", font_size = 11) 
save_kable(tab, file = "Tables/Table 3.tex", escape=F, format="latex", position="center", booktabs=T) 

d <- d %>% filter(year %in% c(1965, 1970))

################################################################################
### Figure 2: Correlation between ADC vote share and election vote share across elections
### Figure A13: Correlation between ADC vote share and election vote share across elections (in constituencies where elite-preferred candidate is not assigned J)

p <- d %>% ggplot(aes(x=adc_rank1_vs, y=rank1_vs))+theme_bw()+
  geom_point(alpha=0.8)+geom_smooth(method="loess",span=1,color="cadetblue",fullrange=T)+facet_grid(cols=vars(year))+
  geom_hline(aes(yintercept=0.5),lty=2,colour="grey30")+
  xlab("ADC vote share")+ylab("Election vote share")+
  theme(strip.background=element_rect(fill="white"))
ggsave(plot=p, file="Figures/Figure 2.pdf",width=6,height=3.25)

p <- d %>% filter(rank_top_J==0) %>% ggplot(aes(x=adc_rank1_vs, y=rank1_vs))+theme_bw()+
  geom_point(alpha=0.8)+geom_smooth(method="loess",span=1,color="cadetblue",fullrange=T)+facet_grid(cols=vars(year))+
  geom_hline(aes(yintercept=0.5),lty=2,colour="grey30")+
  xlab("ADC vote share (J=0 constituencies)")+ylab("Election vote share")+
  theme(strip.background=element_rect(fill="white"))
ggsave(plot=p, file="Figures/Figure A13.pdf",width=6,height=3.25)

################################################################################
### Figure A4: Constituencies and instrument assignment 

shp <- read_sf("Data/shapefile.shp") %>% mutate(constituency_year=paste(constit,year))
d <- read_dta("Data/analysis_1.dta")

shp <- shp %>% mutate(Instrument = case_when(constituency_year %in% d$constituency_year[d$J==1] ~ "1 -> J",
                                             constituency_year %in% d$constituency_year[d$J==0] ~ "1 -> N"), 
                      in_samp = as.numeric(!is.na(Instrument)))

p <- ggplot(data=shp)+theme_bw()+facet_grid(cols=vars(year))+
  geom_sf(aes(fill=Instrument))+scale_fill_viridis(discrete=T, option="viridis")+
  theme(strip.background=element_rect(fill="white"), legend.position="bottom")

ggsave("Figures/Figure A4.pdf",plot=p,width=6,height=3.75)

################################################################################
### Tables 1, A1-A7, A18-A19: Candidate-level analysis and design validation ###

make.comparison <- function(dv,spec,samp){
  
  d$ranked_top <- d$rank_top
  d$winner <- d$elected
  x <- d %>% select(var=any_of(dv), rank_top, year, constituency_year, symbol_J, J, elected, z_adc_rank1_vs)
  
  store <- x
  if(samp=="All"){x <- x}
  if(samp=="1965"){x <- x%>%filter(year==1965)}
  if(samp=="1970"){x <- x%>%filter(year==1970)}
  if(samp=="Control"){x <- x%>%filter(J==0)}
  
  if(spec=="adc"){
    if(samp!="Diff"){
      mu_1 <- mean(na.omit(x$var[x$rank_top==1]))
      mu_2 <- mean(na.omit(x$var[x$rank_top==0]))
      diff <- mu_1 - mu_2
      out <- feols(var ~ rank_top | constituency_year, data=x, cluster="constituency_year", notes=F)
    }
    if(samp=="Diff"){
      x <- store %>% mutate(rank_top65 = case_when(year==1965~rank_top, T~0), 
                            rank_top70 = case_when(year==1970~rank_top, T~0))
      fmla1 <- var ~ rank_top65 + factor(year)
      fmla2 <- var ~ rank_top70 + factor(year)
      tryCatch(
        {
          fitsur <- systemfit::systemfit(list(e1 = fmla1, e2 = fmla2), data=x)
          restriction <- "e1_rank_top65 - e2_rank_top70"
          p <- linearHypothesis(fitsur, restriction, test = "Chisq")
          out <- p$`Pr(>Chisq)`[2]
        },
        error=function(cond){
          out <<- NA
        })}}
  if(spec=="election"){
    if(samp!="Diff"){
      mu_1 <- mean(na.omit(x$var[x$elected==1]))
      mu_2 <- mean(na.omit(x$var[x$elected==0]))
      diff <- mu_1 - mu_2
      out <- feols(var ~ elected | constituency_year, data=x, cluster="constituency_year", notes=F)
    }
    if(samp=="Diff"){
      x <- store %>% mutate(elected65 = case_when(year==1965~elected, T~0), 
                            elected70 = case_when(year==1970~elected, T~0))
      fmla1 <- var ~ elected65 + factor(year)
      fmla2 <- var ~ elected70 + factor(year)
      tryCatch(
        {
          fitsur <- systemfit::systemfit(list(e1 = fmla1, e2 = fmla2), data=x)
          restriction <- "e1_elected65 - e2_elected70"
          p <- linearHypothesis(fitsur, restriction, test = "Chisq")
          out <- p$`Pr(>Chisq)`[2]
        },
        error=function(cond){
          out <<- NA
        })}}
  
  if(spec=="adc_election_diff"){
    fmla1 <- var ~ rank_top
    fmla2 <- var ~ elected
    tryCatch(
      {
        fitsur <- systemfit::systemfit(list(adc = fmla1, elec = fmla2), data=x)
        restriction <- "adc_rank_top - elec_elected"
        p <- linearHypothesis(fitsur, restriction, test = "Chisq")
        out <- p$`Pr(>Chisq)`[2]
      },
      error=function(cond){
        out <<- NA
      })}
  
  if(spec=="ols"){
    mu_1 <- mean(na.omit(x$var[x$elected==1 & x$rank_top==1]))
    mu_2 <- mean(na.omit(x$var[x$elected==1 & x$rank_top==0]))
    diff <- mu_1 - mu_2
    out <- feols(var ~ rank_top | year, data=x %>% filter(elected==1), vcov="hetero", notes=F)
  }
  if(spec=="balance"){ 
    mu_1 <- mean(na.omit(x$var[x$J==1 & x$rank_top==1]))
    mu_2 <- mean(na.omit(x$var[x$J==0 & x$rank_top==1]))
    diff <- mu_1 - mu_2
    out <- feols(var ~ symbol_J | year, data=x %>% filter(rank_top==1), vcov="hetero", notes=F)
  }
  if(spec=="winner"){
    mu_1 <- mean(na.omit(x$var[x$J==1 & x$elected==1]))
    mu_2 <- mean(na.omit(x$var[x$J==0 & x$elected==1]))
    diff <- mu_1 - mu_2
    out <- feols(var ~ J | year, data=x %>% filter(elected==1), vcov="hetero", notes=F)
  }
  if(spec=="rank1_vs"){
    mu_1 <- mean(na.omit(x$var[x$z_adc_rank1_vs<0 & x$rank_top==1]))
    mu_2 <- mean(na.omit(x$var[x$z_adc_rank1_vs>0 & x$rank_top==1]))
    out <- feols(var ~ z_adc_rank1_vs | year, data=x %>% filter(rank_top==1), vcov="hetero", notes=F)
    diff <- out$coefficients[1]
  }
  if(!(spec %in% c("adc_election_diff")) & samp!="Diff"){
    n <- as.character(out$nobs)
    tab <- data.frame(N=n, mu_1, mu_2, diff)
    out <- list(out, tab)
  }
  return(out)
}

make.comparison.groups <- function(samp,t,starnote){
  
  all_1 <- lapply(vars, function(x) make.comparison(x,"adc",samp))
  all_2 <- lapply(vars, function(x) make.comparison(x,"election",samp)) 
  all_3 <- lapply(vars, function(x) make.comparison(x,"adc_election_diff",samp))
  
  info_1 <- as_tibble(bind_rows(lapply(all_1, function(x) x[2]))) %>% mutate(var=vars)
  info_2 <- as_tibble(bind_rows(lapply(all_2, function(x) x[2]))) %>% mutate(var=vars)
  info_3 <- as_tibble(unlist(all_3)) %>% mutate(var=vars) %>% 
    mutate(p = case_when(is.na(value)~"",!is.na(value)~ paste("[",format(round(value, 2)),"]",sep="")) ) %>% 
    mutate(pstar = case_when(value<0.01 ~ paste0(p,"***"),value<0.05 ~ paste0(p,"**"),value<0.1 ~ paste0(p,"*"),TRUE~p)) %>% 
    select(-value, -p)
  
  models_1 <- sapply(all_1, function(x) x[1])
  models_2 <- sapply(all_2, function(x) x[1])
  
  models_1 <- modelsummary(models_1, escape=F, fmt=2, output="data.frame", gof_omit=".*", estimate="[{p.value}]{stars}", statistic=NULL, stars=c('***' = .01, '**' = .05, '*' = .1))
  models_1 <- t(models_1) %>% as.data.frame() %>% filter(row_number()>3)
  models_1$var <- info_1$var
  models_1$V1[models_1$var=="ranked_top"] <- NA
  
  models_2 <- modelsummary(models_2, escape=F, fmt=2, output="data.frame", gof_omit=".*", estimate="[{p.value}]{stars}", statistic=NULL, stars=c('***' = .01, '**' = .05, '*' = .1))
  models_2 <- t(models_2) %>% as.data.frame() %>% filter(row_number()>3)
  models_2$var <- info_2$var
  models_2$V1[models_2$var=="elected"] <- NA
  
  tab <- info_1 %>% full_join(models_1, by="var") %>% full_join(info_2 %>% select(-N), by="var") %>% full_join(models_2, by="var") %>% 
    full_join(info_3, by="var") %>% 
    left_join(naming %>% mutate(varnames = paste("\\hspace{0.1cm}", label)) %>% select(var, varnames), by="var") %>% 
    select(-var) %>% relocate(varnames,.before=1)
  
  colnames(tab) <- c(" ","\\specialcell{(1)}","\\specialcell{(2)}","\\specialcell{(3)}","\\specialcell{(4)}","\\specialcell{(5)}",
                     "\\specialcell{(6)}","\\specialcell{(7)}","\\specialcell{(8)}","\\specialcell{(9)}","\\specialcell{(10)}")
  
  head_1 <- c(1,1,4,4,1)
  names(head_1) <- c(" "," ","A. Party elite preferences","B. Voter preferences","C. Difference")
  head_2 <- c(1,1,rep(1,8),1)
  
  names(head_2) <- c(" ","N","$\\\\mu_{1}$","$\\\\mu_{2}$","$\\\\beta^{\\\\text{ADC}}$","p($\\\\beta^{\\\\text{ADC}}$=0)",
                     "$\\\\mu_{\\\\text{Winner}}$","$\\\\mu_{\\\\text{Loser}}$","$\\\\beta^{\\\\text{Voters}}$","p($\\\\beta^{\\\\text{Voters}}$=0)",
                     "p($\\\\beta^{\\\\text{ADC}}$=$\\\\beta^{\\\\text{Voters}}$)")
  
  sub_split <- sub %>% mutate(x1="",x2="",x3="",x4="",x5="",x6="",x7="",x8="",x9="",x10="",x11="")
  
  out <- datasummary_df(tab, escape=F, align="lrrrrlrrrll", add_rows=sub, col.names=gen.col.index(10),
                        fmt=2, format="latex", stars=c('***' = .01, '**' = .05, '*' = .1), gof_map = c("nobs"), position="!htpb", title=t) %>% 
    add_header_above(header = head_2, escape=F) %>% 
    add_header_above(header = head_1, escape=F) %>%
    add_footnote(starnote, threeparttable = T, escape=F, notation="none") %>% kable_styling(position = "center", font_size = 8)
  out <- fix.format(out)
  return(out)
}

make.comparison.split <- function(spec,diff,cols,t,starnote,header_input){
  
  all <- lapply(vars, function(x) make.comparison(x,spec,"All"))
  all_65 <- lapply(vars, function(x) make.comparison(x,spec,"1965"))
  all_70 <- lapply(vars, function(x) make.comparison(x,spec,"1970"))
  
  info_all <- as_tibble(bind_rows(lapply(all, function(x) x[2])))
  info_65 <- as_tibble(bind_rows(lapply(all_65, function(x) x[2])))
  info_70 <- as_tibble(bind_rows(lapply(all_70, function(x) x[2])))
  
  if(diff==T){
    all_diff <- lapply(vars, function(x) make.comparison(x,spec,"Diff"))
    info_diff <- as_tibble(unlist(all_diff)) %>% mutate(var=vars) %>% 
      mutate(p = case_when(is.na(value)~"", !is.na(value)~ paste("[",format(round(value, 2)),"]",sep="")) ) %>% 
      mutate(pstar = case_when(value<0.01 ~ paste0(p,"***"), value<0.05 ~ paste0(p,"**"), value<0.1 ~ paste0(p,"*"), TRUE~p)) %>% 
      select(-value, -p)
  }
  
  models <- sapply(all, function(x) x[1])
  models_65 <- sapply(all_65, function(x) x[1])
  models_70 <- sapply(all_70, function(x) x[1])
  
  models <- modelsummary(models, escape=F, fmt=2, output="data.frame", gof_omit=".*", estimate="[{p.value}]{stars}", statistic=NULL, stars=c('***' = .01, '**' = .05, '*' = .1))
  models_65 <- modelsummary(models_65, escape=F, fmt=2, output="data.frame", gof_omit=".*", estimate="[{p.value}]{stars}", statistic=NULL, stars=c('***' = .01, '**' = .05, '*' = .1))
  models_70 <- modelsummary(models_70, escape=F, fmt=2, output="data.frame", gof_omit=".*", estimate="[{p.value}]{stars}", statistic=NULL, stars=c('***' = .01, '**' = .05, '*' = .1))
  
  model_sum <- bind_cols(t(models_65), t(models_70)) %>% filter(row_number()>3)
  
  if(diff==T){ tab <- bind_cols(info_65, model_sum%>%select(1), info_70, model_sum%>%select(2), info_diff) }
  if(diff==F){ tab <- bind_cols(info_65, model_sum%>%select(1), info_70, model_sum%>%select(2)) }
  
  if(spec %in% c("balance","winner","ols","rank1_vs")){ 
    model_sum <- bind_cols(t(models), t(models_65), t(models_70)) %>% filter(row_number()>3) 
    tab <- bind_cols(info_all, model_sum%>%select(1), info_65, model_sum%>%select(2), info_70, model_sum%>%select(3)) 
  }
  
  tab <- tab %>% mutate(varnames=paste("\\hspace{0.1cm}", naming$label)) %>% relocate(varnames,.before=1)
  
  if("var" %in% colnames(tab)){tab <- tab %>% select(-var)}
  colnames(tab)[1]<-" "
  
  if(diff==T){
    head_1 <- c(1,5,5,1)
    names(head_1) <- c("  ","A. 1965","B. 1970","C. Difference")
    head_2 <- c(1,rep(1,10),1)
  }
  if(diff==F){
    head_1 <- c(1,5,5)
    names(head_1) <- c("  ","A. 1965","B. 1970")
    head_2 <- c(1,rep(1,10))
  }
  if(spec %in% c("balance","winner","ols","rank1_vs")){
    head_1 <- c(1,5,5,5)
    names(head_1) <- c("  ","Pooled","1965","1970") 
    head_2 <- c(1,rep(1,15))
  }
  if(spec=="adc"){
    p65 <- c("$\\\\beta_{65}^{\\\\text{ADC}}$", "p($\\\\beta_{65}^{\\\\text{ADC}}$=0)")
    p70 <- c("$\\\\beta_{70}^{\\\\text{ADC}}$", "p($\\\\beta_{70}^{\\\\text{ADC}}$=0)")
    if(diff==T){pdiff <- "p($\\\\beta_{65}^{\\\\text{ADC}}$=$\\\\beta_{70}^{\\\\text{ADC}}$)"}
  }
  if(spec=="election"){
    p65 <- c("$\\\\beta_{65}^{\\\\text{Voters}}$", "p($\\\\beta_{65}^{\\\\text{Voters}}$=0)")
    p70 <- c("$\\\\beta_{70}^{\\\\text{Voters}}$", "p($\\\\beta_{70}^{\\\\text{Voters}}$=0)")
    if(diff==T){pdiff <- "p($\\\\beta_{65}^{\\\\text{Voters}}$=$\\\\beta_{70}^{\\\\text{Voters}}$)"}
  }
  if(spec %in% c("balance","winner","ols","rank1_vs")){
    p <- c("p-value")
    p65 <- c("p-value")
    p70 <- c("p-value")
  }
  if(spec %in% c("adc","election")){
    names(head_2) <- c(" ","N",header_input,p65, "N",header_input,p70, pdiff)
    sub_split <- sub %>% mutate(x1="",x2="",x3="",x4="",x5="",x6="")
    algn <- "lrrrrlrrrrll"
  }
  if(spec %in% c("balance","winner","ols","rank1_vs")){
    names(head_2) <- c(" ","N",header_input,p, "N",header_input,p65, "N",header_input,p70)
    sub_split <- sub %>% mutate(x1="",x2="",x3="",x4="",x5="",x6="",x7="",x8="",x9="",x10="")
    algn <- "lrrrrlrrrrlrrrrl"
  }
  
  out <- datasummary_df(tab, escape=F, align=algn, add_rows=sub_split, col.names=gen.col.index(cols),
                        fmt=2, format="latex", stars=c('***' = .01, '**' = .05, '*' = .1), gof_map = c("nobs"), position="!htpb", title=t) %>% 
    add_header_above(header = head_2, escape=F) %>% 
    add_header_above(header = head_1, escape=F) %>%
    add_footnote(starnote, threeparttable = T, escape=F, notation="none") %>% kable_styling(position = "center", font_size = 8)
  out <- fix.format(out)
  return(out)
}

################################################################################

sub_1 <- tibble(name="\\textbf{Selection outcomes}")
sub_2 <- tibble(name="\\textbf{Demographic}")
sub_3 <- tibble(name="\\textbf{Education}")
sub_4 <- tibble(name="\\textbf{Occupation}")
sub_5 <- tibble(name="\\textbf{National political roles}")
sub_6 <- tibble(name="\\textbf{Local party roles}")
sub_7 <- tibble(name="\\textbf{Election outcomes}")

vars <- c('ranked_top','adc_vs','adc_votes',
          'male','age','born_in_district','majority_religion','majority_ethnicity','trad_authority',
          'ed_vocational','ed_foreign','ed_secondary','ed_university',
          'occ_main_farmer','occ_main_teacher','occ_main_religious','occ_main_private','occ_main_bureaucrat_junior','occ_main_bureaucrat_senior',
          'incumbent','minister',
          'leader_local_elected','leader_local_appointed','leader_union_coop','tanu_duration',
          'winner','vs','votes')
naming <- data.frame(var=vars) %>% left_join(label)

################################################################################
### Table 1: Party elite and voter preferences across candidates
### Table A3: Party elite and voter preferences across candidates (J=0 constituencies)

sub <- as_tibble(data.frame(bind_rows(sub_1, sub_2, sub_3, sub_4, sub_5, sub_6, sub_7),"","","","","","","","","",""))
attr(sub, "position") <- c(1,5,12,17,24,27,32)

tab <- make.comparison.groups("All", "Party elite and voter preferences across candidates\\label{tab:1}","\\StarnoteDataADCElection")
save_kable(tab, file = "Tables/Table 1.tex", escape=F, format="latex")

tab <- make.comparison.groups("Control", "Party elite and voter preferences across candidates (J=0 constituencies)\\label{tab:A3}","\\StarnoteDataADCElection")
save_kable(tab, file = "Tables/Table A3.tex", escape=F, format="latex")

################################################################################
### Table A1: Party elite preferences: Differences between candidates ranked first and second by ADC (by year)
### Table A2: Voter preferences: Differences between election winners and losers (by year)

vars <- vars[vars!="ranked_top"]
naming <- data.frame(var=vars) %>% left_join(label)

sub <- as_tibble(data.frame(bind_rows(sub_1, sub_2, sub_3, sub_4, sub_5, sub_6, sub_7),"","","","",""))
attr(sub, "position") <- c(1,4,11,16,23,26,31)

tab <- make.comparison.split(spec="adc",diff=T,cols=11,
                             t="Party elite preferences: Differences between candidates ranked first and second by ADC (by year)\\label{tab:A1}",starnote="\\StarnoteDataADC",
                             header_input =  c("$\\\\mu_{1}$","$\\\\mu_{2}$"))
save_kable(tab, file = "Tables/Table A1.tex", escape=F, format="latex") 

vars <- c("ranked_top",vars[vars!="winner"])
naming <- data.frame(var=vars) %>% left_join(label)
attr(sub, "position") <- c(1,5,12,17,24,27,32)

tab <- make.comparison.split(spec="election",diff=T,cols=11,
                             t="Voter preferences: Differences between election winners and losers (by year)\\label{tab:A2}",starnote="\\StarnoteDataElection",
                             header_input=c("$\\\\mu_{\\\\text{Winner}}$","$\\\\mu_{\\\\text{Loser}}$"))
save_kable(tab, file = "Tables/Table A2.tex", escape=F, format="latex") 

################################################################################
### Table A5: Balance: Differences between first-ranked candidates assigned J and N (by year)

vars <- vars[!(vars %in% c("ranked_top","vs","votes"))]
naming <- data.frame(var=vars) %>% left_join(label)

sub <- as_tibble(data.frame(bind_rows(sub_1, sub_2, sub_3, sub_4, sub_5, sub_6),"","","","",""))
attr(sub, "position") <- c(1,4,11,16,23,26)

tab <- make.comparison.split(spec="balance",diff=F,cols=15,
                             t="Balance: Differences between first-ranked candidates assigned $J$ and $N$ (by year)\\label{tab:A5}",
                             starnote="\\StarnoteBalanceCand",
                             header_input = c("$\\\\mu_{1 \\\\rightarrow J}$","$\\\\mu_{1 \\\\rightarrow N}$","$\\\\beta^{J}$"))
save_kable(tab, file = "Tables/Table A5.tex", escape=F, format="latex") 

################################################################################
### Table A18: Differences in characteristics of elected candidates as a function of instrument assignment

vars <- c("ranked_top", vars)
naming <- data.frame(var=vars) %>% left_join(label)

info <- as_tibble(bind_rows(lapply(vars, function(x) make.comparison(x,"winner","All")[[2]])))
attr(info, 'position') <- c(2,3,4,5)
colnames(info) <- c("\\specialcell{(1)}","\\specialcell{(2)}","\\specialcell{(3)}","\\specialcell{(4)}")

models <- lapply(vars, function(x) make.comparison(x,"winner","All")[[1]])
names(models) <- paste("\\hspace{0.1cm}", naming$label)

head <- c("","N","$\\\\mu_{1 \\\\rightarrow J}$","$\\\\mu_{1 \\\\rightarrow N}$","$\\\\beta^{FS}$","p-value")

sub <- as_tibble(data.frame(bind_rows(sub_1, sub_2, sub_3, sub_4, sub_5, sub_6),"","","","",""))
attr(sub, "position") <- c(1,5,12,17,24,27) 

tab <- modelsummary(models, escape=F, coef_rename = c("J"="\\specialcell{(5)}"),
                    align="lrrrrl", col.names=gen.col.index(5),
                    add_columns=info, add_rows=sub, estimate="[{p.value}]{stars}", statistic=NULL,
                    fmt=2, format="latex", stars=c('***' = .01, '**' = .05, '*' = .1), gof_map = c("nobs"),
                    title="Differences in characteristics of elected candidates as a function of instrument assignment\\label{tab:A18}",
                    shape=model~term) %>% add_header_above(header = head, escape=F) %>%
  add_footnote("\\StarnoteWinners", threeparttable = T, escape=F, notation="none") %>% 
  kable_styling(position = "center", font_size = 11)
tab <- fix.format(tab)
save_kable(tab, file = "Tables/Table A18.tex", escape=F, format="latex") 

################################################################################
### Table A19: Correlates of ADC vote share among elite-preferred candidates

vars <- vars[!(vars %in% c("ranked_top","adc_vs","adc_votes"))]
naming <- data.frame(var=vars) %>% left_join(label)

info <- as_tibble(bind_rows(lapply(vars, function(x) make.comparison(x,"rank1_vs","All")[[2]])))
attr(info, 'position') <- c(2,3,4,5)
colnames(info) <- c("\\specialcell{(1)}","\\specialcell{(2)}","\\specialcell{(3)}","\\specialcell{(4)}")

models <- lapply(vars, function(x) make.comparison(x,"rank1_vs","All")[[1]])
names(models) <- paste("\\hspace{0.1cm}", naming$label)

head <- c("","N","$\\\\mu_{1}^{\\\\text{Weak}}$","$\\\\mu_{1}^{\\\\text{Strong}}$","$\\\\beta$","p-value")

sub <- as_tibble(data.frame(bind_rows(sub_2, sub_3, sub_4, sub_5, sub_6),"","","","","")) 
attr(sub, "position") <- c(1,8,13,20,23,30)

tab <- modelsummary(models, escape=F, coef_rename = c("z_adc_rank1_vs"="\\specialcell{(5)}"),
                    align="lrrrrl", col.names=gen.col.index(5),
                    add_columns=info, add_rows=sub, estimate="[{p.value}]{stars}", statistic=NULL,
                    fmt=2, format="latex", stars=c('***' = .01, '**' = .05, '*' = .1), gof_map = c("nobs"),
                    title="Correlates of ADC vote share among elite-preferred candidates\\label{tab:A19}",
                    shape=model~term) %>% add_header_above(header = head, escape=F) %>%
  add_footnote("\\StarnoteADCWeakStrong", threeparttable = T, escape=F, notation="none") %>% 
  kable_styling(position = "center", font_size = 11)
tab <- fix.format(tab)
save_kable(tab, file = "Tables/Table A19.tex", escape=F, format="latex") 

################################################################################
### Table A4: Confounding: Differences between constituencies wherein first-ranked candidate was elected and second-ranked candidate was elected
### Table A6: Balance: Differences between constituencies wherein first-ranked candidates assigned J and N (by year)

vars <- c('n_cand','adc_votes_total','reg_voters','n_ps',
          'turnout','pres_vs','n_meetings',
          'c57_pop','area','longitude','latitude', 
          'd_road','d_road_any','d_rail','d_town_major','d_town_any','d_afcap',
          'n_pre_pri','n_pre_sec','n_pre_disp','n_pre_health','n_pre_wp')
naming <- data.frame(var=vars) %>% left_join(label)

info <- as_tibble(bind_rows(lapply(vars, function(x) make.comparison(x,"ols","All")[[2]])))
attr(info, 'position') <- c(2,3,4,5)
colnames(info) <- c("\\specialcell{(1)}","\\specialcell{(2)}","\\specialcell{(3)}","\\specialcell{(4)}")

models <- lapply(vars, function(x) make.comparison(x,"ols","All")[[1]])
names(models) <- paste("\\hspace{0.1cm}", naming$label)

head <- c("","N","$\\\\mu_{\\\\text{Winner}=1}$","$\\\\mu_{\\\\text{Winner}=2}$","$\\\\beta$","p-value")

sub_a <- tibble(name="\\textbf{Election}")
sub_b <- tibble(name="\\textbf{Geography}")
sub_c <- tibble(name="\\textbf{Average distance to infrastructure}")
sub_d <- tibble(name="\\textbf{Existing local public goods}")

sub <- as_tibble(data.frame(bind_rows(sub_a, sub_b, sub_c, sub_d),"","","","",""))
attr(sub, "position") <- c(1,9,14,21)

tab <- modelsummary(models, escape=F,coef_rename = c("rank_top"="\\specialcell{(5)}"),align="lrrrrl",col.names=gen.col.index(5),
                    add_columns=info,add_rows=sub,
                    estimate="[{p.value}]{stars}", statistic=NULL,
                    fmt=2, format="latex",stars=c('***' = .01, '**' = .05, '*' = .1),
                    gof_map = c("nobs"),position="!htpb",
                    title="Confounding: Differences between constituencies wherein first-ranked candidate was elected and second-ranked candidate was elected\\label{tab:A4}",
                    shape=model~term) %>% add_header_above(header = head, escape=F) %>%
  add_footnote("\\StarnoteConstituency", threeparttable = T, escape=F, notation="none") %>% 
  kable_styling(position = "center", font_size = 11)
tab <- fix.format(tab)
save_kable(tab, file = "Tables/Table A4.tex", escape=F, format="latex") 

vars <- vars[vars!="n_meetings"]
naming <- data.frame(var=vars) %>% left_join(label)

tab <- make.comparison.split(spec="balance", diff=F, cols=15,
                             t="Balance: Differences between constituencies wherein first-ranked candidates assigned $J$ and $N$ (by year)\\label{tab:A6}",
                             starnote="\\StarnoteBalanceConstit",
                             header_input = c("$\\\\mu_{1 \\\\rightarrow J}$","$\\\\mu_{1 \\\\rightarrow N}$","$\\\\beta^{J}$"))
save_kable(tab,file = "Tables/Table A6.tex", escape=F, format="latex") 

################################################################################
### Table A7: Exclusion: Differences in competitiveness of election races wherein first-ranked candidates assigned J and N

vars <- c('vs','votes','turnout','pres_vs','n_meetings')
naming <- data.frame(var=vars) %>% left_join(label)

info <- as_tibble(bind_rows(lapply(vars, function(x) make.comparison(x,"winner","All")[[2]])))
attr(info, 'position') <- c(2,3,4,5)
colnames(info) <- c("\\specialcell{(1)}","\\specialcell{(2)}","\\specialcell{(3)}","\\specialcell{(4)}")

models <- lapply(vars, function(x) make.comparison(x,"winner","All")[[1]])
names(models) <- paste("\\hspace{0.1cm}", naming$label) 
head <- c("","N","$\\\\mu_{1 \\\\rightarrow J}$","$\\\\mu_{1 \\\\rightarrow N}$","$\\\\beta^{FS}$","p-value")

tab <- modelsummary(models, escape=F, coef_rename = c("J"="\\specialcell{(5)}"),align="lrrrrl",add_columns=info,col.names=gen.col.index(5),
                    estimate="[{p.value}]{stars}", statistic=NULL, fmt=2, 
                    format="latex",stars=c('***' = .01, '**' = .05, '*' = .1),
                    gof_map = c("nobs"),position="!htpb",
                    title="Exclusion: Differences in competitiveness of election races wherein first-ranked candidates assigned $J$ and $N$\\label{tab:A7}",
                    shape=model~term) %>% add_header_above(header = head, escape=F) %>%
  add_footnote("\\StarnoteExclusion", threeparttable = T, escape=F, notation="none") %>% 
  kable_styling(position = "center", font_size = 11)
tab <- fix.format(tab)
save_kable(tab, file = "Tables/Table A7.tex", escape=F, format="latex") 

################################################################################
### Figure A3: Sincerity of ADC ranking behavior

vars_ed <- c('ed_foreign','ed_secondary','ed_university')
vars_exp <- c('occ_main_bureaucrat_senior','incumbent','minister')

select_ed <- d %>% filter(rank<=3) %>% group_by(rank) %>% summarise(across(all_of(vars_ed), mean)) %>% 
  pivot_longer(2:4) %>% mutate(grp="Education") %>% left_join(label, by=c("name"="var"))
select_exp <- d %>% filter(rank<=3) %>% group_by(rank) %>% summarise(across(all_of(vars_exp), mean)) %>% 
  pivot_longer(2:4) %>% mutate(grp="Political experience") %>% left_join(label, by=c("name"="var"))

make.sincere.plot <- function(df,yl){
  p <- df %>% 
    ggplot(aes(x=rank, y=value, group=label, color=label))+theme_bw()+
    geom_line()+geom_point()+facet_grid(cols=vars(grp))+
    theme(strip.background=element_rect(fill="white"), legend.title = element_blank())+
    scale_x_continuous(breaks=c(1,2,3))+ylim(c(0,0.6))+xlab("ADC rank")+ylab(yl)+
    scale_color_manual(values=wes_palette(n=3, name="Darjeeling1"))+theme(legend.position = c(0.7,0.82))
  return(p)
}

p <- egg::ggarrange(make.sincere.plot(select_ed,"Share of candidates with characteristic"),
                    make.sincere.plot(select_exp,NULL), nrow=1)
ggsave(filename = "Figures/Figure A3.pdf", plot=p, width=6.5, height=3.5)

################################################################################
### Figure A5: Validating administrative data on primary schools
### Figure A6: Provision of different local public goods by year, 1960-1980

lpg <- fread("Data/lpg.csv")

gen.p <- function(t){
  p <- ggplot(data=lpg %>% filter(y%in%1960:1980 & type==t) %>% group_by(y,type) %>% summarise(n=sum(n)),
              aes(x=y,y=n))+theme_bw()+
    facet_grid(cols=vars(type))+xlab(NULL)+geom_line()+ylab("Number of facilities founded")+
    geom_point()+scale_y_continuous(limits=c(0,NA))+scale_x_continuous(breaks=seq(1960,1980,2))+
    theme(strip.background=element_rect(fill="white"), legend.position="bottom")
  return(p)  
}

p1 <- gen.p("Primary school")
p2 <- gen.p("Secondary school")
p3 <- gen.p("Dispensary")
p4 <- gen.p("Health facility")
p5 <- gen.p("Water point")

p <- egg::ggarrange(p1,p2,p3,p4,p5,nrow=2)
ggsave(file="Figures/Figure A6.pdf",plot=p,width=12,height=5)

sch <- fread("Data/schools_validation.csv")

lpg <- lpg %>% filter(type=="Primary school") %>% filter(y<=1967) %>% group_by(constituency=constituency_1965) %>% summarise(n_admin=sum(n)) %>% mutate(election=1965, year=1967) %>% 
  bind_rows(lpg %>% filter(type=="Primary school") %>% filter(y<=1973) %>% group_by(constituency=constituency_1970) %>% summarise(n_admin=sum(n)) %>% mutate(election=1970, year=1973)) %>% 
  left_join(shp %>% st_drop_geometry() %>% select(year,constituency=constit, district), by=c("election"="year","constituency")) %>% filter(!is.na(district)) %>% 
  group_by(district, election, year) %>% summarise(n_admin=sum(n_admin)) %>% 
  left_join(sch, by=c("district","election","year"))

p <- ggplot(data=lpg, aes(x=n_historical, y=n_admin))+theme_bw()+
  facet_grid(cols=vars(year))+
  geom_point(alpha=0.75)+geom_smooth(method="lm",colour="cadetblue", fullrange=T, se=F)+geom_abline(aes(intercept=0,slope=1),lty=2)+
  theme(strip.background=element_rect(fill="white"))+scale_x_log10(limits=c(5,500))+scale_y_log10(limits=c(5,500))+
  xlab("Number of schools reported (historical data)")+ylab("Number of schools existing in 1967 or 1973\n(administrative data)")

ggsave(filename="Figures/Figure A5.pdf",plot=p,width=7,height=3.75)

################################################################################
### Figures A7-A9, A10-A12: Estimation of first stage and instrumental variables 

d <- read_dta("Data/analysis_2.dta")

### Figure A7: Distribution of outcome measures

sum <- d %>% select(starts_with("ln_n_post_")) %>% pivot_longer(cols=everything()) %>% 
  mutate(label = case_when(name=="ln_n_post_disp"~"Dispensaries", name=="ln_n_post_health"~"Health facilities", name=="ln_n_post_pri"~"Primary schools", 
                           name=="ln_n_post_sec"~"Secondary schools", name=="ln_n_post_wp"~"Water points", name=="ln_n_post_other"~"Other local public goods\n(non-primary schools)")) %>% 
  filter(!is.na(label)) %>% 
  mutate(label = factor(label, levels=c("Primary schools","Other local public goods\n(non-primary schools)","Dispensaries","Health facilities","Secondary schools","Water points"))) %>% 
  mutate(value=exp(value))

p1 <- ggplot(data=sum %>% filter(label %in% c("Primary schools","Other local public goods\n(non-primary schools)")), aes(x=value))+theme_bw()+geom_histogram()+
  facet_grid(cols=vars(label)) + 
  theme(strip.background=element_rect(fill="white")) + ylab(NULL)+xlab(NULL)+
  scale_x_log10()+scale_y_continuous(limits=c(0,30))

p2 <- ggplot(data=sum %>% filter(!(label %in% c("Primary schools","Other local public goods\n(non-primary schools)"))), aes(x=value))+theme_bw()+geom_histogram()+
  facet_grid(cols=vars(label)) + 
  theme(strip.background=element_rect(fill="white")) + ylab(NULL)+xlab(NULL)+
  scale_x_log10()+scale_y_continuous(limits=c(0,100))

ggsave(file="Figures/Figure A7.pdf",plot=ggarrange(p1, p2, nrow=2), width=8,height=6)

################################################################################
### Figure A8: Spatial distribution of outcome measures

p <- shp %>% left_join(d %>% select(year, constit=constituency, ln_n_post_pri, ln_n_post_other), 
                           by=c("constit","year")) %>% 
  select(year, constit, in_samp, ln_n_post_pri:ln_n_post_other)

p1 <- p %>% ggplot()+theme_bw()+facet_grid(cols=vars(year))+
  geom_sf(aes(fill=ln_n_post_pri))+
  scale_fill_viridis(option="viridis", name = "Log primary schools")+ 
  theme(strip.background=element_rect(fill="white"), legend.position="bottom")

p2 <- p %>% ggplot()+theme_bw()+facet_grid(cols=vars(year))+
  geom_sf(aes(fill=ln_n_post_other))+
  scale_fill_viridis(option="viridis", name = "Log other public goods")+ 
  theme(strip.background=element_rect(fill="white"), legend.position="bottom")

ggsave("Figures/Figure A8.pdf",plot=ggarrange(p1, p2, ncol=1),width=6,height=7.5)

################################################################################
### Figure A9: Estimates of first stage while excluding districts or regions
### Figure A12: Baseline estimates from Table 5 while excluding districts or regions

unique_d <- unique(d$district_year)
unique_r <- unique(d$region_year)

make.loo <- function(dv,spec,exclude){
  x <- d %>% dplyr::select(var=any_of(dv), J, treat, year, region_year, district_year)
  unique_d <- unique(x$district_year)
  unique_r <- unique(x$region_year)
  
  if(spec=="District"){x <- x %>% filter(!(district_year %in% unique_d[exclude]))}  
  if(spec=="Region"){x <- x %>% filter(!(region_year %in% unique_r[exclude]))}  
  
  out <- feols(var ~ J | year, data=x, vcov="hetero", notes=F)
  coef <- out$coefficients[1]
  ci_lower <- coef-(out$se*1.96)
  ci_upper <- coef+(out$se*1.96)
  
  return(data.frame(spec,exclude,coef,ci_lower,ci_upper))
}

make.loo.plot <- function(spec, input_df, x_lab){
  p <- ggplot(data=input_df, aes(x=index))+theme_bw()+
    geom_errorbar(aes(ymin=ci_lower, ymax=ci_upper), width=0)+geom_point(aes(y=coef))+
    geom_hline(aes(yintercept=0),lty=2,alpha=0.5)+
    xlab(x_lab)+theme(strip.background=element_rect(fill="white"))
  if(spec=="fs"){p <- p+ylab(bquote(beta^FS))+ylim(c(0,0.5))}
  if(spec=="iv"){p <- p+ylab(bquote(beta^FS))+ylim(c(-2,3))+facet_grid(cols=vars(outcome))}
  return(p)
}

out_d <- bind_rows(lapply(1:length(unique_d), function(x) make.loo("treat","District",x))) %>% arrange(coef) %>% mutate(index=1:length(exclude))
out_r <- bind_rows(lapply(1:length(unique_r), function(x) make.loo("treat","Region",x))) %>% arrange(coef) %>% mutate(index=1:length(exclude))

p_d <- make.loo.plot("fs", out_d, "Excluded district-year")
p_r <- make.loo.plot("fs", out_r, "Excluded region-year")

ggsave(filename="Figures/Figure A9.pdf", plot=ggarrange(p_d, p_r,nrow=1), width=7.5, height=3.5)

iv_results <- fread("Data/iv_jackknife.csv")

out_d <- iv_results %>% filter(spec=="District") %>% arrange(coef) %>% group_by(outcome) %>% mutate(index=1:length(excluded))
out_d$outcome <- factor(out_d$outcome, levels=c("Primary schools","Other local public goods"))

out_r <- iv_results %>% filter(spec=="Region") %>% arrange(coef) %>% group_by(outcome) %>% mutate(index=1:length(excluded))
out_r$outcome <- factor(out_r$outcome, levels=c("Primary schools","Other local public goods"))

p_d <- make.loo.plot("iv", out_d, "Excluded district-year")
p_r <- make.loo.plot("iv", out_r, "Excluded region-year")

ggsave(filename="Figures/Figure A12.pdf", plot=ggarrange(p_d, p_r), width=8, height=6)

################################################################################
### Figure A10: Heterogeneity in estimated first stage effect
### Figure A11: Correlates of first stage effect heterogeneity

p <- d %>% ggplot(aes(x=wgt))+theme_bw()+
  xlab("Predicted first stage coefficient")+geom_histogram(binwidth=0.005)+ylab(NULL)
ggsave(file="Figures/Figure A10.pdf", plot=p, width=4, height=4)

vars <- c("reg_voters","n_ps","n_cand","adc_votes_total","male","incumbent","minister","adc_votes","adc_diff",
          "ed_secondary","ed_university","ed_vocational","ed_foreign","born_in_district","trad_authority",
          "leader_union_coop","leader_local_appointed","leader_local_elected",
          "occ_main_farmer","occ_main_teacher","occ_main_private","occ_main_bureaucrat_junior","occ_main_bureaucrat_senior",
          "c67_christian","c67_muslim","c67_muslim_majority","c67_christian_majority",
          "c67_eth1_share","c67_eth2_share","c67_eth3_share","c67_ethSukuma_share","c67_ethNyamwezi_share","c67_ethNyamweziSukuma_share",
          "ln_n_pre_pri","ln_n_pre_sec","ln_n_pre_disp","ln_n_pre_health","ln_n_pre_wp",
          "longitude","latitude","area","d_road","d_road_any","d_rail","d_town_major","d_afcap","d_town_any") 

gen.corr <- function(wgt, dv){
  out <- feols(as.formula(paste(wgt, "~ scale(", dv,")")), data=d)
  return(data.frame(dv,coef=out$coefficients[2],wgt))
}

corr <- bind_rows(lapply(vars, function(x) gen.corr("wgt", x)))
corr <- corr %>% arrange(-abs(coef)) %>% filter(row_number()<=20)

corr <- left_join(corr, label, by=c("dv"="var"))
corr$label <- gsub("km\\$\\^2\\$","sq. km", corr$label)
vars_cand <- c("born_in_district","ed_vocational","ed_university","occ_main_teacher","occ_main_private","occ_main_farmer")
corr$label[corr$dv %in% vars_cand] <- paste("Candidate:", corr$label[corr$dv %in% vars_cand])
corr$label <- fct_rev(factor(corr$label, levels=unique(corr$label)))

p <- ggplot(data=corr, aes(x=coef, y=label))+theme_bw()+
  geom_point()+geom_vline(aes(xintercept=0),lty=2,colour="grey30")+
  xlab("Standardized correlation with\npredicted treatment effect")+ylab(NULL)
ggsave("Figures/Figure A11.pdf",plot=p,width=4.5,height=4)

################################################################################
### Figure A1: Global distribution of legislative candidate selection methods

vd <- vdemdata::vdem
vp <- vdemdata::vparty

vp <- vp %>% group_by(year,country_name) %>% 
  mutate(ruling=as.numeric(v2paseatshare==max(v2paseatshare))) %>% 
  mutate(decade=case_when(year%in%1970:1979~"1970s",year%in%1980:1989~"1980s",year%in%1990:1999~"1990s",year%in%2000:2009~"2000s",year%in%2010:2019~"2010s")) %>% 
  filter(!is.na(decade))

sum_1 <- vp %>% filter(!is.na(v2panom_ord)) %>% group_by(decade,v2panom_ord) %>% 
  dplyr::summarize(n=n()) %>% ungroup() %>% tidyr::complete(decade, v2panom_ord, fill=list(n=0)) %>% 
  group_by(decade) %>% mutate(sum_n=sum(n), share=n/sum_n) %>% 
  mutate(method=case_when(v2panom_ord==0~"Party leader", v2panom_ord==1~"National party elites", v2panom_ord==2~"Local party elites", v2panom_ord==3~"All party members")) %>% 
  mutate(type="All parties")

sum_2 <- vp %>% filter(!is.na(v2panom_ord) & ruling==1) %>% group_by(decade,v2panom_ord) %>% 
  dplyr::summarize(n=n()) %>% ungroup() %>% tidyr::complete(decade, v2panom_ord, fill=list(n=0)) %>% 
  group_by(decade) %>% mutate(sum_n=sum(n), share=n/sum_n) %>% 
  mutate(method=case_when(v2panom_ord==0~"Party leader", v2panom_ord==1~"National party elites", v2panom_ord==2~"Local party elites", v2panom_ord==3~"All party members")) %>% 
  mutate(type="Ruling parties")

sum <- bind_rows(sum_1, sum_2)

p <- ggplot(data=sum %>% filter(!is.na(method)), aes(x=decade,y=share,group=factor(method),color=factor(method)))+
  geom_line()+geom_point()+theme_bw()+scale_color_viridis(discrete=T)+xlab(NULL)+ylab("Share of party-years")+
  facet_grid(cols=vars(type))+theme(legend.title = element_blank(), legend.position="bottom")+ylim(c(0,0.65)) +
  theme(strip.background=element_rect(fill="white"))
ggsave("Figures/Figure A1.pdf", plot=p, width=8, height=4)

################################################################################
### Figure A2: Cross-national scope conditions

vars_vd <- c("v2xnp_regcorr","v2xnp_pres")
vd_sum <- vd %>% filter(year>1960 & year<=1990) %>%
  group_by(country_name,country_text_id,e_regionpol_6C) %>% 
  summarise(across(vars_vd, function(x) mean(na.omit(x)))) 

vars_vp <- c("v2pariglef")
vp_sum <- vp %>% filter(year>1960 & ruling==1 & year<=1990) %>%
  group_by(country_name,country_text_id,e_regionpol_6C) %>% 
  summarise(across(vars_vp, function(x) mean(na.omit(x))))

sum <- vd_sum %>% left_join(vp_sum, by=c("country_name", "country_text_id", "e_regionpol_6C"))

sum <- sum %>% mutate(region=case_when(e_regionpol_6C==1~"Eastern Europe and Central Asia", e_regionpol_6C==2~"Latin America and Caribbean", e_regionpol_6C==3~"Middle East and North Africa", 
                                       e_regionpol_6C==4~"Sub-Saharan Africa", e_regionpol_6C==5~"Western Europe and North America", e_regionpol_6C==6~"Asia-Pacific"),
                      tz = as.numeric(country_name=="Tanzania"))

sum$v2pariglef <- scale(sum$v2pariglef)[,1]
sum$v2xnp_regcorr <- scale(sum$v2xnp_regcorr)[,1]

p <- sum %>% ggplot(aes(x=v2xnp_regcorr,y=v2pariglef,color=v2xnp_pres))+theme_bw() +
  geom_label(data=sum%>%filter(country_text_id=="TZA"), aes(label=country_text_id),size=3) +
  geom_text(aes(label=country_text_id),size=3)+ 
  scale_color_viridis(discrete=F, option="mako")+ 
  facet_wrap(~region)+xlab("Regime corruption index")+ylab("Redistributive policy")+
  theme(strip.background=element_rect(fill="white"), legend.position="bottom")+
  labs(color = "Presidentialism")

ggsave(file="Figures/Figure A2.pdf", plot=p, width=8, height=5.5)

################################################################################
### Figure A14: Central government capital allocations across public goods

d <- fread("Data/five_year_plans.csv") %>% 
  mutate(category=factor(category, levels=unique(category)), 
         Discretion=factor(Discretion, levels=c("Low","Medium","High")))

p <- d %>% ggplot(aes(x=category, y=share, group=Discretion, fill=Discretion))+theme_bw()+geom_col() + 
  facet_grid(rows=vars(plan_lab))+theme(strip.background=element_rect(fill="white"))+
  ylab("Share of total planned\ncentral government capital expenditure")+xlab(NULL)+
  scale_fill_viridis(discrete=T, end=0.8)

ggsave("Figures/Figure A14.pdf",p,width=7.5,height=5)

################################################################################
### Figure A15: Distribution of distance to facilities in HRDS data (1993)

d <- read_dta("Data/analysis_4.dta")

p <- d %>% filter(index==1) %>%
  pivot_longer(cols = matches("_dist_")) %>%
  mutate(value = exp(value),
         lab = case_when(name == "ln_dist_pri" ~ "Primary school", name == "ln_dist_sec" ~ "Secondary school",
                         name == "ln_dist_disp" ~ "Dispensary", name == "ln_dist_hosp" ~ "Health facility",
                         name == "ln_dist_wp" ~ "Water point"),
         lab = factor(lab, levels = unique(lab))) %>%
  group_by(lab) %>%
  mutate(av = mean(na.omit(value))) %>%
  ggplot(aes(x=value))+theme_bw()+facet_grid(cols=vars(lab))+scale_x_log10(limits=c(50,50000))+
    geom_histogram(binwidth=0.15)+geom_vline(aes(xintercept=av),lty=2,color="cadetblue")+
    theme(strip.background=element_rect(fill="white")) + ylab(NULL) + xlab("Distance to closest facility (m)")

ggsave("Figures/Figure A15.pdf",p,width=9,height=3.5)

